import requests
from lxml import etree

headers={
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50"
}

start_url='https://www.ppzuowen.com/book/en/fuermosiyingwenban/'
url_list=[]

start_text=requests.get(url=start_url,headers=headers).text
start_tree=etree.HTML(start_text)
fail_list=[]
start_list=start_tree.xpath('/html/body/div[6]/div[1]/div/ul/li')
for i in start_list:
    url_list.append('https://www.ppzuowen.com'+i.xpath('./a/@href')[0])
content=''
for i in url_list:

    text=requests.get(url=i,headers=headers).text
    tree=etree.HTML(text)
    
    content_list=tree.xpath('/html/body/div[6]/div[1]/div[1]/p')
    try:
        for j in content_list:
            content+=j.xpath('./text()')[0].encode('iso-8859-1').decode('gbk')

            
        print(tree.xpath('/html/body/div[6]/div[1]/h2/text()')[0].encode('iso-8859-1').decode('gbk')+'采集成功')
    except Exception as e:
        try:
            for j in content_list:
                content+=j.xpath('./strong/text()')[0].encode('iso-8859-1').decode('gbk')

            print(tree.xpath('/html/body/div[6]/div[1]/h2/text()')[0].encode('iso-8859-1').decode('gbk')+'采集成功')
        except Exception as e:
            print(tree.xpath('/html/body/div[6]/div[1]/h2/text()')[0].encode('iso-8859-1').decode('gbk')+'采集失败')
            fail_list.append(i+'  '+tree.xpath('/html/body/div[6]/div[1]/h2/text()')[0].encode('iso-8859-1').decode('gbk'))
print(fail_list)
print(content)
file_name='home_data'
with open(file_name,'w') as fp:
    fp.write(content)
